Loading packages

Pulling the most recent data from covidtracking.com…

## [1] "Retrieved  2020-11-30 19:52:56"
## [1] "Data as of 2020-11-30"

Narrowing data…

Adding state population data…

Building tables…

done

Tests & cases

my_data %>% 
  inner_join(code_names, by = "code") %>% 
  inner_join(st_pop_2019, by = "name") %>%
  filter(as_of >= Sys.Date() - days(7)) %>% 
  mutate(week_num = lubridate::week(as_of)) %>% 
  group_by(code) %>%
  summarize(pos_rate = sum(case_ch) / sum(test_ch), reg_2 = min(reg_2)) %>% 
  mutate(pos_rate = if_else(pos_rate < 0, 0, pos_rate)) %>% 
  filter(pos_rate < 0.05 | pos_rate > 0.10) %>% 
  ggplot(mapping = aes(x = reorder(code, pos_rate), y = pos_rate, fill = reg_2)) +
  geom_col() +
  geom_label(mapping = aes(label = code)) +
  labs(x = "Date", y = "Share of COVID-19 tests with positive results", title = "Positive rates remain high in the Midwest and plains", subtitle = str_c("Week ending  ", max(my_data$as_of))) +
  theme(legend.title = element_blank(), axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank())
## `summarise()` ungrouping output (override with `.groups` argument)

Tests & cases

my_data %>% 
  inner_join(code_names, by = "code") %>% 
  inner_join(st_pop_2019, by = "name") %>%
  filter(as_of >= Sys.Date() - days(7)) %>% 
  mutate(case_days = if_else(between(as_of, Sys.Date() - days(6), Sys.Date() - days(6)), 1, 0),
         death_days = if_else(as_of >= Sys.Date() - days(6), 1, 0)) %>% 
  group_by(code) %>%
  summarize(cfr = sum(death_ch * death_days) / sum(case_ch * case_days), reg_2 = min(reg_2)) %>% 
  ggplot(mapping = aes(x = reorder(code, cfr), y = cfr, fill = reg_2)) +
  geom_col() +
  geom_label(mapping = aes(label = code)) +
  labs(x = "Date", y = "Deaths as a share of confirmed cases", title = "Case fatality rate") +
  theme(legend.title = element_blank(), axis.title.x = element_blank(), axis.text.x = element_blank(), axis.ticks.x = element_blank())
## `summarise()` ungrouping output (override with `.groups` argument)

Tests & cases

my_data %>% 
  inner_join(code_names, by = "code") %>% 
  inner_join(st_pop_2019, by = "name") %>%
  filter(as_of >= Sys.Date() - weeks(4)) %>% 
  mutate(week_num = lubridate::week(as_of)) %>% 
  group_by(week_num, reg_2) %>%
  summarize(as_of = median(as_of), pos_rate = sum(case_ch) / sum(test_ch)) %>% 
  ggplot(mapping = aes(x = as_of, y = pos_rate, color = reg_2, fill = reg_2)) +
  geom_smooth(alpha = .05, span = .5) +
  labs(x = "Date", y = "Share of COVID-19 tests with positive results", title = "Positive rates are trending upward", subtitle = str_c("Month ending  ", max(my_data$as_of))) +
  theme(legend.title = element_blank())
## `summarise()` regrouping output by 'week_num' (override with `.groups` argument)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Tests & cases

my_data %>% 
  inner_join(code_names, by = "code") %>% 
  inner_join(st_pop_2019, by = "name") %>%
  filter(as_of >= Sys.Date() - months(1)) %>% 
  mutate(week_num = lubridate::week(as_of)) %>% 
  group_by(week_num, reg_2) %>%
  summarize(as_of = median(as_of), pos_rate = sum(case_ch) / sum(test_ch), wk_test = sum(test_ch)) %>% 
  ggplot(mapping = aes(x = as_of, y = pos_rate, color = reg_2, fill = reg_2)) +
  geom_smooth(span = .5) +
  geom_line() +
  labs(x = "Date", y = "Share of COVID-19 tests with positive results", title = "Positive rates remain high in the plains and midwest", subtitle = str_c("Two months ending  ", max(my_data$as_of))) +
  theme(legend.title = element_blank())
## `summarise()` regrouping output by 'week_num' (override with `.groups` argument)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

Tests & cases

my_data %>% 
  inner_join(code_names, by = "code") %>% 
  inner_join(st_pop_2019, by = "name") %>%
  filter(as_of >= Sys.Date() - months(1)) %>% 
  mutate(week_num = lubridate::week(as_of)) %>% 
  group_by(week_num, code) %>%
  summarize(pos_rate = sum(case_ch) / sum(test_ch),
            as_of = median(as_of),
            reg_2 = mode(reg_2)) %>% 
  group_by(code, week_num) %>% 
  mutate(y_label = if_else(as_of == median(as_of, na.rm = TRUE),
                           median(pos_rate, na.rm = TRUE),
                           NULL),
         month_pos = mean(pos_rate, na.rm = TRUE)) %>% 
  ungroup() %>% 
  ggplot(mapping = aes(x = as_of, y = pos_rate)) +
  geom_line(alpha = 0, span = .6, mapping = aes(color = reg_2)) +
  geom_label(mapping = aes(y = y_label, label = code), na.rm = TRUE) +
  labs(x = "Date", y = "Share of COVID-19 tests with positive results", title = "Positive rates remain high in some states", subtitle = str_c(Sys.Date() - months(1), " through ", max(my_data$as_of), "; Ordered by highest positive rate for the month"))
## `summarise()` regrouping output by 'week_num' (override with `.groups` argument)

# +
#   theme(legend.title = element_blank()) +
#  facet_wrap(facets = vars(reorder(code, desc(month_pos))), nrow = 5) +
#    theme(axis.text = element_blank(),
#        axis.ticks = element_blank(),
#  axis.title.x  = element_blank(),
# strip.text.x = element_blank(),
# legend.position = 0)

Adds 2016 presidential election results

results_2016 <- read_csv("results_2016.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   code = col_character(),
##   t_mgn = col_double()
## )

my_data %>% 
  inner_join(code_names) %>% 
  inner_join(st_pop_2019) %>% 
  mutate(noreast = if_else(code %in% (pk), "Northeastern states", "All other states")) %>% 
  group_by(noreast, as_of) %>%
  summarize(us_cases = sum(cases, na.rm = TRUE) / sum(pop_2019, na.rm = TRUE),
            us_cases_ch = sum(case_ch, na.rm = TRUE) / sum(pop_2019, na.rm = TRUE),
            us_deaths = sum(death, na.rm = TRUE) / sum(pop_2019, na.rm = TRUE),
            us_deaths_ch = sum(death_ch, na.rm = TRUE) / sum(pop_2019, na.rm = TRUE)) %>% 
  filter(us_cases > .000009) %>%
  mutate(since = min(as_of)) %>% 
  ggplot(mapping = aes(x = as_of, y = us_deaths_ch)) +
  geom_smooth(linetype = 0, alpha = .05, mapping = aes(fill = weekdays(as_of), color = noreast)) +
  geom_smooth(alpha = 0, size = 1, linetype = 2, mapping = aes(color = noreast)) +
  geom_smooth(color = "black") +
  theme(legend.position = "none") +
  labs(title = str_c(str_c(pk, collapse = ", "), " have switched w/others", sep = ""), subtitle = "Deaths per million since 10 cases per million", x = "date", y = "COVID-19 deaths")
## Joining, by = "code"
## Joining, by = "name"
## `summarise()` regrouping output by 'noreast' (override with `.groups` argument)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

pop_dens <- read_csv("pop_density.csv") %>% 
  select(code, dens = Density, pop = Pop)
## 
## -- Column specification --------------------------------------------------------
## cols(
##   State = col_character(),
##   code = col_character(),
##   Density = col_double(),
##   Pop = col_double(),
##   LandArea = col_double()
## )
temp <- my_data %>%
  inner_join(pop_dens) %>% 
  filter(as_of == max(as_of, na.rm = TRUE)) %>% 
  mutate(death_pm = death / pop * 1E6,
         case_pm = cases / pop * 1E6,
         log_death_pm = log(death_pm, 10))
## Joining, by = "code"
bestfit_intercept <- lm(formula = temp$log_death_pm ~ temp$dens)[[1]][[1]]
bestfit_slope <- lm(formula = temp$log_death_pm ~ temp$dens)[[1]][[2]]
my_data %>% 
  inner_join(results_2016) %>%
  inner_join(pop_dens) %>%
  filter(as_of == max(as_of)) %>% 
  mutate(death_pm = death/pop * 1000000,
         log_death_pm = log(death_pm, base = 10)) %>%
  group_by(code) %>%
  mutate(hrc = (t_mgn < 0),
         tier = case_when(dens >= 485 ~ "F",
                          between(dens, 183, 485) ~ "E",
                          between(dens, 69, 183) ~ "D",
                          between(dens, 26, 69) ~ "C",
                          between(dens, 10, 26) ~ "B",
                          dens < 10 ~ "A")) %>% 
  ungroup() %>%
  ggplot(mapping = aes(x = dens, y = log_death_pm)) +
                       # , color = hrc, fill = hrc)) +
  geom_text_repel(mapping = aes(x = dens, y = log_death_pm, label = code, fill = NULL, color = hrc), position = position_jitter()) +
  # geom_smooth(linetype = 3, method = "lm", alpha = 0.1) +
  geom_abline(slope = bestfit_slope, intercept = bestfit_intercept, linetype = 3) +
  facet_wrap(vars(tier),  scales = "free_x", nrow = 2) +
  labs(x = "Population per square mile", y = "Log of COVID-19 deaths per million", title = "For many states, density is density", subtitle = str_c("States plotted by density and COVID-19 deaths along a best-fit line, as of ", case_growth$wk_ending[1])) +
  theme(legend.position = "none", strip.text = element_blank()) +
  ylim(0, 4)
## Joining, by = "code"
## Joining, by = "code"

my_data %>% 
  inner_join(code_names) %>% 
  inner_join(st_pop_2019) %>% 
  inner_join(results_2016) %>% 
  mutate(day_of_week = weekdays(as_of)) %>% 
  group_by(code, day_of_week) %>%
  mutate(max_case_ch = max(case_ch),
         hi_date = if_else(case_ch == max_case_ch, as_of, NULL),
         case_ch_pm = as.integer(case_ch / pop_2019 * 1000000),
         pos_rate = case_ch / test_ch) %>% 
  filter(hi_date == max(as_of)) %>% 
  select(as_of, code, case_ch_pm, pos_rate) %>% 
  arrange(desc(case_ch_pm)) %>% 
  group_by(code) %>% 
  summarize(as_of = max(as_of), case_ch_pm = mean(case_ch_pm, na.rm = TRUE), pos_rate = max(pos_rate, na.rm = TRUE)) %>% 
  ggplot(mapping = aes(x = reorder(code, desc(case_ch_pm)), y = case_ch_pm, fill = pos_rate)) +
  labs(title = str_c("States with a record number of new cases week ending ", max(my_data$as_of)),
       y = "Daily new cases per million",
       x = "State") +
  geom_col() +
  geom_label(mapping = aes(label = weekdays(as_of, abbreviate = TRUE)), color = "white") +
  theme(axis.text.x = element_text(angle = 90, size = 8))
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## Adding missing grouping variables: `day_of_week`
## `summarise()` ungrouping output (override with `.groups` argument)

my_data %>% 
  inner_join(code_names) %>% 
  inner_join(st_pop_2019) %>% 
  inner_join(results_2016) %>% 
  mutate(day_of_week = weekdays(as_of)) %>% 
  group_by(code, day_of_week) %>%
  mutate(max_case_ch = max(case_ch),
         hi_date = if_else(case_ch == max_case_ch, as_of, NULL),
         case_ch_pm = as.integer(case_ch / pop_2019 * 1000000),
         pos_rate = case_ch / test_ch,
         is_hrc = (t_mgn < 0)) %>% 
  filter(hi_date == max(as_of)) %>% 
  select(as_of, code, case_ch_pm, pos_rate, is_hrc) %>% 
  arrange(desc(case_ch_pm)) %>% 
  group_by(code, is_hrc) %>% 
  summarize(as_of = max(as_of),
            case_ch_pm = mean(case_ch_pm, na.rm = TRUE),
            pos_rate = max(pos_rate, na.rm = TRUE)) %>% 
  ggplot(mapping = aes(x = reorder(code, desc(case_ch_pm)), y = case_ch_pm, fill = is_hrc)) +
  labs(title = str_c("States with a record number of new cases week ending ", max(my_data$as_of)),
       y = "Daily new cases per million",
       x = "State") +
  geom_col(color = "white") +
  geom_text(mapping = aes(label = weekdays(as_of, abbreviate = TRUE)), color = "black", angle = 90) +
  theme(axis.text.x = element_text(angle = 90, size = 8))
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## Adding missing grouping variables: `day_of_week`
## `summarise()` regrouping output by 'code' (override with `.groups` argument)

my_data %>% 
  inner_join(code_names) %>% 
  inner_join(st_pop_2019) %>% 
  inner_join(results_2016) %>% 
  mutate(day_of_week = weekdays(as_of)) %>% 
  group_by(code, day_of_week, t_mgn) %>%
  mutate(max_case_ch = max(case_ch),
         hi_date = if_else(case_ch == max_case_ch, as_of, NULL),
         case_ch_pm = as.integer(case_ch / pop_2019 * 1000000),
         pos_rate = case_ch / test_ch,
         is_hrc = (t_mgn < 0)) %>% 
  filter(hi_date == max(as_of)) %>% 
  select(as_of, code, case_ch_pm, pos_rate, is_hrc) %>% 
  arrange(desc(case_ch_pm)) %>% 
  group_by(code, is_hrc) %>% 
  summarize(as_of = max(as_of), case_ch_pm = mean(case_ch_pm, na.rm = TRUE), pos_rate = max(pos_rate, na.rm = TRUE)) %>% 
  ggplot(mapping = aes(x = reorder(code, desc(case_ch_pm)), y = case_ch_pm, fill = pos_rate)) +
  labs(title = str_c("45 states had a record high number of new cases this week"),
       subtitle = str_c("week ending ", max(my_data$as_of)),
       y = "Daily new cases per million",
       x = "State") +
  geom_col() +
  geom_label(mapping = aes(label = weekdays(as_of, abbreviate = TRUE)), color = "white") +
  theme(axis.text.x = element_text(angle = 90, size = 8))
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## Adding missing grouping variables: `day_of_week`, `t_mgn`
## `summarise()` regrouping output by 'code' (override with `.groups` argument)

my_data %>% 
  inner_join(code_names) %>% 
  inner_join(st_pop_2019) %>% 
  inner_join(results_2016) %>% 
  mutate(day_of_week = weekdays(as_of)) %>% 
  group_by(code, day_of_week) %>%
  mutate(max_death_ch = max(death_ch),
         hi_date = if_else(death_ch == max_death_ch, as_of, NULL),
         death_ch_pm = as.integer(death_ch / pop_2019 * 1000000),
         cfr = death_ch / case_ch,
         is_hrc = (t_mgn < 0)) %>% 
  filter(hi_date == max(as_of)) %>% 
  select(as_of, code, death_ch_pm, cfr, is_hrc) %>% 
  arrange(desc(death_ch_pm)) %>% 
  group_by(code, is_hrc) %>% 
  summarize(as_of = max(as_of), death_ch_pm = mean(death_ch_pm, na.rm = TRUE), cfr = max(cfr, na.rm = TRUE)) %>% 
  ggplot(mapping = aes(x = reorder(code, desc(death_ch_pm)), y = death_ch_pm, fill = is_hrc)) +
  labs(title = str_c("States with a record number of deaths week ending ", max(my_data$as_of)),
       y = "Daily deaths per million",
       x = "State") +
  geom_col() +
  geom_label(mapping = aes(label = weekdays(as_of, abbreviate = TRUE)), color = "white")
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## Adding missing grouping variables: `day_of_week`
## `summarise()` regrouping output by 'code' (override with `.groups` argument)

my_data %>% 
  inner_join(code_names) %>% 
  inner_join(st_pop_2019) %>%
  inner_join(results_2016) %>% 
  mutate(year_mo = month(as_of)) %>% 
  group_by(year_mo, code) %>% 
  summarize(case_ch_pm = 30.5 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
            death_ch_pm = 30.5 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
            is_hrc = sum(t_mgn, na.rm = TRUE) < 0,
            pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
            mo_of = median(ymd(str_c("2020", year_mo, "15", sep = "-")))) %>%
  filter(case_ch_pm > 0, death_ch_pm > 0) %>% 
  group_by(year_mo) %>% 
  top_n(n = 10, case_ch_pm) %>%
  ggplot(mapping = aes(x = mo_of, y = case_ch_pm, label = code, color = is_hrc, size = pos_rate)) +
  geom_label(direction = "y", segment.color = NA, position = position_jitter(width = 10, height = 60)) +
  theme(legend.position = "none") +
  scale_size(range = c(3, 6)) +
  labs(x = "month",
       y = "monthly cases per million",
       title = "Since June, red states have led in new cases per capita")
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_mo' (override with `.groups` argument)
## Warning: Ignoring unknown parameters: direction, segment.colour

my_data %>% 
  inner_join(code_names) %>% 
  inner_join(st_pop_2019) %>%
  inner_join(results_2016) %>% 
  mutate(year_mo = month(as_of)) %>% 
  group_by(year_mo, code) %>% 
  summarize(case_ch_pm = 30.5 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
            death_ch_pm = 30.5 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
            is_hrc = sum(t_mgn, na.rm = TRUE) < 0,
            pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
            mo_of = median(ymd(str_c("2020", year_mo, "1", sep = "-")))) %>%
  filter(case_ch_pm > 0, death_ch_pm > 0) %>% 
  group_by(year_mo) %>% 
  top_n(n = 10, death_ch_pm) %>%
  ggplot(mapping = aes(x = mo_of, y = death_ch_pm, label = code, color = is_hrc)) +
  geom_label(position = position_jitter(width = 10, height = 60), segment.color = NA) +
  theme(legend.position = "none") +
  labs(title = "Ten states with most per capita COVID-19 deaths by month",
       y = "Monthly COVID-19 deaths per million residents",
       x = "Date")
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_mo' (override with `.groups` argument)
## Warning: Ignoring unknown parameters: segment.colour

my_data %>% 
  inner_join(code_names) %>% 
  inner_join(st_pop_2019) %>%
  inner_join(results_2016) %>% 
  mutate(year_wk = week(as_of)) %>% 
  group_by(year_wk, code) %>% 
  summarize(case_ch_pm = 7 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
            death_ch_pm = 7 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
            is_hrc = sum(t_mgn, na.rm = TRUE) < 0,
            pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
            wk_date = min(as_of, na.rm = TRUE)) %>%
  filter(case_ch_pm > 0, death_ch_pm > 0) %>% 
  group_by(wk_date) %>% 
  top_n(n = 3, death_ch_pm) %>%
  ggplot(mapping = aes(x = wk_date, y = death_ch_pm, label = code, fill = is_hrc)) +
  geom_label(na.rm = TRUE, position = position_jitter(width = 3, height = 25)) +
  theme(legend.position = "none") +
  labs(subtitle = str_c("Five states with most per capita COVID-19 deaths by week through", max(my_data$as_of, na.rm = TRUE), sep = " "),
       y = "Weekly COVID-19 deaths per million residents",
       x = "Date") +
  xlim(ymd(20200301), today())
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_wk' (override with `.groups` argument)

my_data %>% 
  inner_join(code_names) %>% 
  inner_join(st_pop_2019) %>%
  inner_join(results_2016) %>% 
  mutate(year_wk = week(as_of)) %>% 
  group_by(year_wk, code) %>% 
  summarize(case_ch_pm = 7 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
            death_ch_pm = 7 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
            is_hrc = sum(t_mgn, na.rm = TRUE) < 0,
            pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
            wk_date = min(as_of, na.rm = TRUE)) %>%
  filter(case_ch_pm > 0, death_ch_pm > 0) %>% 
  group_by(wk_date) %>% 
  top_n(n = 3, case_ch_pm) %>%
  ggplot(mapping = aes(x = wk_date, y = case_ch_pm, label = code, fill = is_hrc)) +
  geom_label(na.rm = TRUE, position = position_jitter(width = 3, height = 25)) +
  theme(legend.position = "none") +
  labs(title = "Since June, states that voted for President Trump have led infection rates",
       subtitle = str_c("Three states with most new per capita COVID-19 cases by week through", max(my_data$as_of, na.rm = TRUE), sep = " "),
       y = "Weekly new COVID-19 cases per million residents",
       x = "Date") +
  xlim(ymd(20200301), today())
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_wk' (override with `.groups` argument)

my_data %>% 
  inner_join(code_names) %>% 
  inner_join(st_pop_2019) %>%
  inner_join(results_2016) %>% 
  inner_join(pop_dens) %>% 
  mutate(year_wk = week(as_of)) %>% 
  group_by(year_wk, reg_2, code, dens) %>% 
  summarize(case_ch_pm = 7 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
            death_ch_pm = 7 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
            is_hrc = sum(t_mgn, na.rm = TRUE) < 0,
            pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
            wk_date = min(as_of, na.rm = TRUE)) %>%
  filter(case_ch_pm > 0, death_ch_pm > 0) %>% 
  group_by(wk_date) %>% 
  top_n(n = 3, case_ch_pm) %>%
  mutate(wk_dens = mean(dens, na.rm = TRUE)) %>% 
  ggplot(mapping = aes(x = wk_date, y = case_ch_pm)) +
  geom_label(mapping = aes(label = code, fill = reg_2), na.rm = TRUE, position = position_jitter(width = 3, height = 25)) +
  geom_col(mapping = aes(y = wk_dens), alpha = .4) +
  labs(subtitle = str_c("Three states with most new per capita COVID-19 cases by week through", max(my_data$as_of, na.rm = TRUE), sep = " "),
       y = "Weekly new COVID-19 cases per million residents",
       x = "Date",
       legend = NULL,
       caption = "Gray bars represent mean population density of top states") +
  xlim(ymd(20200301), today()) +
  theme(legend.title = element_blank())
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_wk', 'reg_2', 'code' (override with `.groups` argument)
## Warning: Removed 1 rows containing missing values (position_stack).
## Warning: Removed 1 rows containing missing values (geom_col).

my_data %>% 
  inner_join(code_names) %>% 
  inner_join(st_pop_2019) %>%
  inner_join(results_2016) %>% 
  mutate(year_wk = week(as_of)) %>% 
  group_by(year_wk, reg_2, code) %>% 
  summarize(case_ch_pm = 7 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
            death_ch_pm = 7 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
            is_hrc = sum(t_mgn, na.rm = TRUE) < 0,
            pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
            wk_date = min(as_of, na.rm = TRUE)) %>%
  filter(case_ch_pm > 0, death_ch_pm > 0) %>% 
  group_by(wk_date) %>% 
  top_n(n = 5, case_ch_pm) %>%
  ggplot(mapping = aes(x = wk_date, y = death_ch_pm, label = code, fill = reg_2)) +
  geom_label(na.rm = TRUE, position = position_jitter(width = 3, height = 25)) +
  theme(
    # legend.position = "none"
    legend.title = element_blank()) +
  labs(subtitle = str_c("Five states with most per capita COVID-19 deaths by week through", max(my_data$as_of, na.rm = TRUE), sep = " "),
       y = "Weekly COVID-19 deaths per million residents",
       x = "Date") +
  xlim(ymd(20200301), today())
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_wk', 'reg_2' (override with `.groups` argument)

my_data %>% 
  inner_join(code_names) %>% 
  inner_join(st_pop_2019) %>%
  inner_join(results_2016) %>% 
  mutate(year_wk = week(as_of)) %>% 
  group_by(year_wk, reg_2, code) %>% 
  summarize(case_ch_pm = 7 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
            death_ch_pm = 7 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
            is_hrc = sum(t_mgn, na.rm = TRUE) < 0,
            pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
            wk_date = min(as_of, na.rm = TRUE)) %>%
  filter(case_ch_pm > 0, death_ch_pm > 0) %>% 
  group_by(wk_date) %>% 
  top_n(n = 5, case_ch_pm) %>%
  ggplot(mapping = aes(x = wk_date, y = death_ch_pm, label = code, fill = reg_2)) +
  geom_label(na.rm = TRUE, position = position_jitter(width = 3, height = 25)) +
  theme(
    # legend.position = "none"
    legend.title = element_blank()) +
  labs(subtitle = str_c("Five states with most per capita COVID-19 deaths by week through", max(my_data$as_of, na.rm = TRUE), sep = " "),
       y = "Weekly COVID-19 deaths per million residents",
       x = "Date") +
  xlim(ymd(20200301), today())
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_wk', 'reg_2' (override with `.groups` argument)

my_data %>% 
  inner_join(code_names) %>% 
  inner_join(st_pop_2019) %>%
  inner_join(results_2016) %>% 
  inner_join(pop_dens) %>% 
  mutate(year_wk = week(as_of)) %>% 
  group_by(year_wk, reg_2, code, dens) %>% 
  summarize(case_ch_pm = 7 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
            death_ch_pm = 7 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
            is_hrc = sum(t_mgn, na.rm = TRUE) < 0,
            pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
            wk_date = min(as_of, na.rm = TRUE)) %>%
  filter(case_ch_pm > 0, death_ch_pm > 0) %>% 
  group_by(wk_date) %>% 
  top_n(n = 3, case_ch_pm) %>%
  ggplot(mapping = aes(x = wk_date, y = death_ch_pm, label = code, fill = dens)) +
  geom_label(na.rm = TRUE, position = position_jitter(width = 3, height = 25), color = "white") +
  # theme(legend.position = "none") +
  labs(subtitle = str_c("Five states with most per capita COVID-19 deaths by week through", max(my_data$as_of, na.rm = TRUE), sep = " "),
       y = "Weekly COVID-19 deaths per million residents",
       x = "Date",
       legend = "Population density in pop. per sq. mi.") +
  xlim(ymd(20200301), today()) +
  theme_bw()
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_wk', 'reg_2', 'code' (override with `.groups` argument)

Determine weekday factors

wday_fact <- my_data %>% 
  filter(as_of >= today() - months(1)) %>% 
  mutate(wk_num = week(as_of),
         wkday = lubridate::wday(as_of),
         case_ch = if_else(case_ch == 0, 1L, case_ch),
         death_ch = if_else(death_ch == 0, 1L, death_ch)) %>% 
  group_by(code, wk_num) %>% 
  mutate(wday_case_local = case_ch / mean(case_ch, na.rm = TRUE),
         wday_death_local = death_ch / mean(death_ch, na.rm = TRUE)) %>% 
  ungroup() %>% 
  group_by(code, wkday) %>% 
  summarize(wday_case_global = mean(wday_case_local, na.rm = TRUE),
         wday_death_global = mean(wday_death_local, na.rm = TRUE))
## `summarise()` regrouping output by 'code' (override with `.groups` argument)
my_data %>% 
  mutate(wkday = lubridate::wday(as_of)) %>% 
  inner_join(wday_fact) %>% 
  filter(code == "CA", as_of > today() - weeks(2)) %>% 
  mutate(case_ch_adj = case_ch / wday_case_global,
         death_ch_adj = death_ch / wday_death_global,
         t_day = wkday + 1) %>% 
  mutate(day_name = lubridate::wday(x = t_day, label = TRUE)) %>%
  ggplot(mapping = aes(x = as_of)) +
  geom_line(mapping = aes(y = case_ch), color = "gray", size = 3) +
  geom_line(mapping = aes(y = case_ch_adj))
## Joining, by = c("code", "wkday")

my_data %>% 
  inner_join(wday_fact) %>% 
  inner_join(results_2016) %>% 
  group_by(code) %>%
  mutate(case_ch_adj = wday_case_global * case_ch,
         death_ch_adj = wday_death_global * death_ch,
         t_day = wkday + 1) %>% 
  filter(case_ch_adj == max(case_ch_adj, na.rm = TRUE) &
         as_of == max(as_of)) %>%
  mutate(day_name = lubridate::wday(x = t_day, label = TRUE)) %>% 
  ggplot(mapping = aes(x = code, y = case_ch_adj)) +
  geom_col() +
  geom_label(mapping = aes(label = day_name)) +
  labs(subtitle = "States with record adjusted new cases reported today")
## Joining, by = "code"
## Joining, by = "code"

my_data %>% 
  inner_join(wday_fact) %>% 
  inner_join(results_2016) %>% 
  group_by(code) %>%
  mutate(case_ch_adj = wday_case_global * case_ch,
         death_ch_adj = wday_death_global * death_ch) %>% 
  filter(death_ch_adj == max(death_ch_adj, na.rm = TRUE) &
         as_of == max(as_of)) %>%
  mutate(day_name = lubridate::wday(wkday + 1, label = TRUE)) %>% 
  ggplot(mapping = aes(x = code, y = death_ch_adj)) +
  geom_col() +
  geom_label(mapping = aes(label = day_name)) +
  labs(subtitle = "States with record adjusted deaths reported today")
## Joining, by = "code"
## Joining, by = "code"

my_data %>% 
  inner_join(code_names) %>% 
  inner_join(st_pop_2019) %>%
  inner_join(results_2016) %>% 
  mutate(year_wk = week(as_of)) %>% 
  filter(code %in% c("CA", "CT", "MI", "NJ", "NY")) %>% 
  group_by(year_wk, code) %>% 
  summarize(case_ch_pm = 7 * mean(case_ch / pop_2019 * 1000000, na.rm = TRUE),
            death_ch_pm = 7 * mean(death_ch / pop_2019 * 1000000, na.rm = TRUE),
            is_hrc = sum(t_mgn, na.rm = TRUE) < 0,
            pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
            wk_date = min(as_of, na.rm = TRUE)) %>%
  filter(case_ch_pm > 0, death_ch_pm > 0) %>% 
  ggplot(mapping = aes(x = wk_date, y = death_ch_pm, label = code, fill = code, color = code)) +
  labs(subtitle = str_c("Compare some states through", max(my_data$as_of, na.rm = TRUE), sep = " "),
       y = "Weekly COVID-19 deaths per million residents",
       x = "Date") +
  xlim(ymd(20200301), today()) +
  geom_smooth(alpha = 0.1, span = .2) +
  theme_fivethirtyeight() +
  theme(legend.title = element_blank())
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## `summarise()` regrouping output by 'year_wk' (override with `.groups` argument)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

nation_wk <- my_data %>% 
  inner_join(code_names) %>% 
  inner_join(st_pop_2019) %>%
  inner_join(results_2016) %>% 
  mutate(year_wk = week(as_of)) %>% 
  # filter(code %in% c("CA")) %>% 
  group_by(year_wk) %>% 
  summarize(case_ch_pm = 7 * mean(sum(case_ch) / sum(pop_2019) * 1000000, na.rm = TRUE),
            death_ch_pm = 7 * mean(sum(death_ch) / sum(pop_2019) * 1000000, na.rm = TRUE),
            test_ch_pm = 7 * mean(sum(test_ch) / sum(pop_2019) * 1000000, na.rm = TRUE),
            pos_rate = sum(case_ch, na.rm = TRUE) / sum(test_ch, na.rm = TRUE),
            wk_date = min(as_of, na.rm = TRUE)) %>%
  filter(case_ch_pm > 0, death_ch_pm > 0)
## Joining, by = "code"
## Joining, by = "name"
## Joining, by = "code"
## `summarise()` ungrouping output (override with `.groups` argument)
nation_wk %>% 
  ggplot(mapping = aes(x = wk_date, y = case_ch_pm)) +
  labs(subtitle = str_c("Per capita COVID-19 cases by week through", max(my_data$as_of, na.rm = TRUE), sep = " "),
       y = "Weekly COVID-19 cases per million residents",
       x = "Date") +
  xlim(ymd(20200301), today()) +
  geom_smooth(span = 0.2, na.rm = TRUE)
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

nation_wk %>% 
  ggplot(mapping = aes(x = wk_date, y = death_ch_pm)) +
  labs(subtitle = str_c("Per capita COVID-19 deaths by week through", max(my_data$as_of, na.rm = TRUE), sep = " "),
       y = "Weekly COVID-19 deaths per million residents",
       x = "Date") +
  xlim(ymd(20200301), today()) +
  geom_line(na.rm = TRUE)

cor_data <- my_data %>%
  select(as_of, code, cases, death) %>% 
  filter(as_of > ymd("20200601"))

j <- data.table(code = character(), d_offset = integer(), cor_offset = double(), cfr = double())

st_vector <- code_names %>% select(code) %>% arrange(code) %>% pull(code)

for (k in st_vector) {
  
for (i in 1:28) {
j <- rbind(j,
           data.table(code = k,
                      d_offset = i,
                      cor_offset = cor_data %>%
                        filter(code == k) %>% 
                        mutate(d_lag = as_of + days(i)) %>%
                        left_join(cor_data[code == k], by = c("d_lag" = "as_of")) %>%
                        pull(-1) %>%
                        cor(x = ., y = cor_data[code == k]$cases, use = "complete.obs"),
           cfr = (cor_data[code == k & between(as_of, max(as_of) - 14, max(as_of))]$death) / (cor_data[code == k & between(as_of, max(as_of) -14 - i, max(as_of) - i)]$cases)))
}
  }
case_death <- j %>% 
  group_by(code) %>% 
  arrange(desc(cor_offset)) %>% 
  slice_head()
my_data %>%
  inner_join(case_death) %>%
  inner_join(code_names) %>% 
  inner_join(st_pop_2019) %>% 
  group_by(code) %>% 
  filter(between(as_of, max(as_of, na.rm = TRUE) - d_offset + 1, max(as_of, na.rm = TRUE) - d_offset + 14)) %>% 
  mutate(as_of_proj = as_of + d_offset,
         wk_proj = if_else(as_of <= max(as_of, na.rm = TRUE) - 7, str_c("Week starting ", max(my_data$as_of) + days(1)), str_c("Week starting ", max(my_data$as_of) + days(8))),
         death_pm_proj_day = case_ch * cfr / pop_2019 * 1E6,
         two_wk = sum(death_pm_proj_day)) %>%
  ungroup() %>% 
  group_by(code, wk_proj, two_wk) %>%
  summarize(death_pm_proj_wk = sum(death_pm_proj_day, na.rm = TRUE)) %>% 
  ggplot(mapping = aes(y = death_pm_proj_wk, x = wk_proj, label = code)) +
  geom_label_repel(color = "black", fill = "forestgreen")
## Joining, by = "code"
## Joining, by = "code"
## Joining, by = "name"
## `summarise()` regrouping output by 'code', 'wk_proj' (override with `.groups` argument)

case_death %>% 
  ggplot(mapping = aes(x = cfr)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.